#!/usr/bin/env python3

from pathlib import Path
from typing import List, Set
from glob import glob
from shutil import rmtree

import os
import re

ZYDIS_ROOT = Path(__file__).resolve().parent.parent
PUBLIC_INCLUDE_PATHS = [
    ZYDIS_ROOT / 'include',
    ZYDIS_ROOT / 'dependencies' / 'zycore' / 'include',
]
INTERNAL_INCLUDE_PATHS = [ZYDIS_ROOT / 'src']
INCLUDE_REGEXP = re.compile(r'^#\s*include\s*<((?:Zy|Generated).*)>\s*$')
OUTPUT_DIR = ZYDIS_ROOT / 'amalgamated-dist'
FILE_HEADER = ['// DO NOT EDIT. This file is auto-generated by `amalgamate.py`.', '']


# Python versions before 3.10 don't have the root_dir argument for glob, so we 
# crudely emulate it here.
def glob_in_dir(
    pattern: str,
    root_dir: Path,
):
    cwd = os.getcwd()
    root_dir = root_dir.resolve()
    os.chdir(root_dir)
    try:
        for path in glob(pattern, recursive=True):
            yield Path(root_dir) / path
    finally:
        os.chdir(cwd)


def find_include_path(
    include: str,
    search_paths: List[Path],
) -> Path:
    for search_path in search_paths:
        path = search_path / include
        if path.exists():
            return path.absolute()
    else:
        raise FileNotFoundError(f'can\'t find header: {include}')


def merge_headers(
    *,
    header: str,
    search_paths: List[Path],
    covered_headers: Set[Path],
    stack: List[str],
) -> List[str]:
    # Locate and load header contents.
    path = find_include_path(header, search_paths)
    with path.open() as f:
        lines = [x.rstrip() for x in f]

    if header in covered_headers:
        return []

    print(f'Processing header "{header}"')
    covered_headers.add(header)

    # Print the header we emit next & the include stack (if non-root).
    include_stack = []
    if stack:
        include_stack = [
            '//', 
            '// Include stack:', 
            *(f'//   - {x}' for x in stack)
        ]

    filtered = [
        f'',
        f'//',
        f'// Header: {header}',
        *include_stack,
        f'//',
        f'',
    ]

    # Copy over lines and recursively inline all headers.
    for line in lines:
        match = INCLUDE_REGEXP.match(line)
        if not match:
            filtered.append(line)
            continue
        
        # Recurse into includes.
        filtered += merge_headers(
            header=match.group(1), 
            search_paths=search_paths,
            covered_headers=covered_headers, 
            stack=stack + [header],
        )

    return filtered


def merge_sources(*, source_dir: Path, covered_headers: Set[Path]):
    output = [
        '#include <Zydis.h>',
        '',
    ]

    for source_file in glob_in_dir('**/*.c', source_dir):
        print(f'Processing source file "{source_file}"')

        # Print some comments to show where the code is from.
        output += [
            f'',
            f'//',
            f'// Source file: {source_file}',
            f'//',
            f'',
        ]

        # Read source file.
        with (source_dir / source_file).open() as f:
            lines = [x.rstrip() for x in f]

        # Walk source file's lines.
        for line in lines:
            # Emit non-includes as-is.
            match = INCLUDE_REGEXP.match(line)
            if not match:
                output.append(line)
                continue
            path = match.group(1)

            if path in covered_headers:
                continue

            if 'Internal' not in path and 'Generated' not in path:
                print(
                    f'WARN: Including header that looks like it is public '
                    f'and should thus already be covered by `Zydis.h` '
                    f'during processing of source files: {path}'
                )

            print(f'Processing internal header "{path}"')
            output += merge_headers(
                header=path,
                search_paths=PUBLIC_INCLUDE_PATHS + INTERNAL_INCLUDE_PATHS,
                covered_headers=covered_headers,
                stack=[],
            )

    return output


def main():
    if OUTPUT_DIR.exists():
        print('Output directory exists. Deleting.')
        rmtree(OUTPUT_DIR)

    OUTPUT_DIR.mkdir()

    covered_headers = set()
    with open(OUTPUT_DIR / 'Zydis.h', 'w') as f:
        f.write('\n'.join(FILE_HEADER + merge_headers(
            header='Zydis/Zydis.h', 
            search_paths=PUBLIC_INCLUDE_PATHS,
            covered_headers=covered_headers, 
            stack=[],
        )))

    with open(OUTPUT_DIR / 'Zydis.c', 'w') as f:
        f.write('\n'.join(FILE_HEADER + merge_sources(
            source_dir=ZYDIS_ROOT / 'src',
            covered_headers=covered_headers,
        )))


if __name__ == '__main__':
    main()

